{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "7d3b19ce",
   "metadata": {},
   "source": [
    "## Running Async Transformations in Jupyter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1f17f05a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: refuel-autolabel[all] in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (0.0.12)\n",
      "Requirement already satisfied: loguru>=0.5.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.5.3)\n",
      "Requirement already satisfied: numpy>=1.23.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.23.3)\n",
      "Requirement already satisfied: requests>=2.27.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2.27.1)\n",
      "Requirement already satisfied: datasets>=2.7.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2.7.1)\n",
      "Requirement already satisfied: langchain==0.0.210 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.0.210)\n",
      "Requirement already satisfied: nervaluate>=0.1.8 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.1.8)\n",
      "Requirement already satisfied: pandas>=1.3.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.3.5)\n",
      "Requirement already satisfied: scikit-learn>=1.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.0.2)\n",
      "Requirement already satisfied: tenacity>=8.2.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (8.2.2)\n",
      "Requirement already satisfied: SQLAlchemy>=2.0.19 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2.0.19)\n",
      "Requirement already satisfied: regex>=2023.6.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2023.6.3)\n",
      "Requirement already satisfied: rich>=13.3.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (13.3.5)\n",
      "Requirement already satisfied: scipy>=1.10.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.10.1)\n",
      "Requirement already satisfied: pydantic>=1.10.9 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.10.9)\n",
      "Requirement already satisfied: torch>=1.10.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.10.1)\n",
      "Requirement already satisfied: matplotlib>=3.5.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (3.5.1)\n",
      "Requirement already satisfied: wget>=3.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (3.2)\n",
      "Requirement already satisfied: ipywidgets==8.0.6 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (8.0.6)\n",
      "Requirement already satisfied: jsonschema>=4.17.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (4.17.3)\n",
      "Requirement already satisfied: tabulate>=0.9.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.9.0)\n",
      "Requirement already satisfied: typer[all]>=0.9.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.9.0)\n",
      "Requirement already satisfied: simple-term-menu>=1.6.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.6.1)\n",
      "Requirement already satisfied: black in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (22.3.0)\n",
      "Requirement already satisfied: bumpver in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2023.1121)\n",
      "Requirement already satisfied: pip-tools in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (6.13.0)\n",
      "Requirement already satisfied: pytest in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (7.2.0)\n",
      "Collecting pytest-asyncio (from refuel-autolabel[all])\n",
      "  Downloading pytest_asyncio-0.21.1-py3-none-any.whl (13 kB)\n",
      "Requirement already satisfied: pytest-mock in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (3.8.2)\n",
      "Requirement already satisfied: pre-commit in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2.20.0)\n",
      "Requirement already satisfied: openai>=0.27.4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.27.4)\n",
      "Requirement already satisfied: tiktoken>=0.3.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.3.3)\n",
      "Requirement already satisfied: anthropic==0.2.6 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.2.6)\n",
      "Requirement already satisfied: transformers>=4.25.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (4.25.1)\n",
      "Requirement already satisfied: google-cloud-aiplatform>=1.25.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.25.0)\n",
      "Requirement already satisfied: cohere>=4.11.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (4.11.2)\n",
      "Requirement already satisfied: sentence-transformers in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (2.2.2)\n",
      "Collecting pdfplumber>=0.10.2 (from refuel-autolabel[all])\n",
      "  Downloading pdfplumber-0.10.2-py3-none-any.whl (47 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.5/47.5 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pdf2image>=1.16.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.16.3)\n",
      "Requirement already satisfied: pytesseract>=0.3.10 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.3.10)\n",
      "Requirement already satisfied: bs4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.0.1)\n",
      "Requirement already satisfied: httpx in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (0.21.3)\n",
      "Requirement already satisfied: fake-useragent in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from refuel-autolabel[all]) (1.2.1)\n",
      "Requirement already satisfied: tokenizers in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from anthropic==0.2.6->refuel-autolabel[all]) (0.13.2)\n",
      "Requirement already satisfied: aiohttp in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from anthropic==0.2.6->refuel-autolabel[all]) (3.8.4)\n",
      "Requirement already satisfied: ipykernel>=4.5.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipywidgets==8.0.6->refuel-autolabel[all]) (6.6.1)\n",
      "Requirement already satisfied: ipython>=6.1.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipywidgets==8.0.6->refuel-autolabel[all]) (7.31.0)\n",
      "Requirement already satisfied: traitlets>=4.3.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipywidgets==8.0.6->refuel-autolabel[all]) (5.1.1)\n",
      "Requirement already satisfied: widgetsnbextension~=4.0.7 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipywidgets==8.0.6->refuel-autolabel[all]) (4.0.7)\n",
      "Requirement already satisfied: jupyterlab-widgets~=3.0.7 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipywidgets==8.0.6->refuel-autolabel[all]) (3.0.7)\n",
      "Requirement already satisfied: PyYAML>=5.4.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (6.0)\n",
      "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (4.0.2)\n",
      "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (0.5.7)\n",
      "Requirement already satisfied: langchainplus-sdk>=0.0.17 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (0.0.17)\n",
      "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (2.8.4)\n",
      "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from langchain==0.0.210->refuel-autolabel[all]) (1.2.4)\n",
      "Requirement already satisfied: backoff<3.0,>=2.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from cohere>=4.11.2->refuel-autolabel[all]) (2.2.1)\n",
      "Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from cohere>=4.11.2->refuel-autolabel[all]) (6.6.0)\n",
      "Requirement already satisfied: pyarrow>=6.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (9.0.0)\n",
      "Requirement already satisfied: dill<0.3.7 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (0.3.4)\n",
      "Requirement already satisfied: tqdm>=4.62.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (4.64.1)\n",
      "Requirement already satisfied: xxhash in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (3.1.0)\n",
      "Requirement already satisfied: multiprocess in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (0.70.12.2)\n",
      "Requirement already satisfied: fsspec[http]>=2021.11.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (2022.8.2)\n",
      "Requirement already satisfied: huggingface-hub<1.0.0,>=0.2.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (0.13.3)\n",
      "Requirement already satisfied: packaging in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (21.3)\n",
      "Requirement already satisfied: responses<0.19 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from datasets>=2.7.0->refuel-autolabel[all]) (0.18.0)\n",
      "Requirement already satisfied: google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (2.10.1)\n",
      "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.22.2)\n",
      "Requirement already satisfied: protobuf!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.19.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (4.23.0)\n",
      "Requirement already satisfied: google-cloud-storage<3.0.0dev,>=1.32.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (2.7.0)\n",
      "Requirement already satisfied: google-cloud-bigquery<4.0.0dev,>=1.15.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (3.10.0)\n",
      "Requirement already satisfied: google-cloud-resource-manager<3.0.0dev,>=1.3.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.6.3)\n",
      "Requirement already satisfied: shapely<2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.8.5.post1)\n",
      "Requirement already satisfied: attrs>=17.4.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jsonschema>=4.17.3->refuel-autolabel[all]) (20.3.0)\n",
      "Requirement already satisfied: importlib-resources>=1.4.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jsonschema>=4.17.3->refuel-autolabel[all]) (5.4.0)\n",
      "Requirement already satisfied: pkgutil-resolve-name>=1.3.10 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jsonschema>=4.17.3->refuel-autolabel[all]) (1.3.10)\n",
      "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jsonschema>=4.17.3->refuel-autolabel[all]) (0.18.0)\n",
      "Requirement already satisfied: cycler>=0.10 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (0.11.0)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (4.28.5)\n",
      "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (1.3.2)\n",
      "Requirement already satisfied: pillow>=6.2.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (9.4.0)\n",
      "Requirement already satisfied: pyparsing>=2.2.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (2.4.7)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from matplotlib>=3.5.0->refuel-autolabel[all]) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2017.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pandas>=1.3.0->refuel-autolabel[all]) (2021.3)\n",
      "Requirement already satisfied: pdfminer.six==20221105 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pdfplumber>=0.10.2->refuel-autolabel[all]) (20221105)\n",
      "Collecting pypdfium2>=4.18.0 (from pdfplumber>=0.10.2->refuel-autolabel[all])\n",
      "  Downloading pypdfium2-4.18.0-py3-none-macosx_11_0_arm64.whl (2.8 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m32.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: charset-normalizer>=2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pdfminer.six==20221105->pdfplumber>=0.10.2->refuel-autolabel[all]) (2.0.9)\n",
      "Requirement already satisfied: cryptography>=36.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pdfminer.six==20221105->pdfplumber>=0.10.2->refuel-autolabel[all]) (36.0.1)\n",
      "Requirement already satisfied: typing-extensions>=4.2.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pydantic>=1.10.9->refuel-autolabel[all]) (4.4.0)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from requests>=2.27.0->refuel-autolabel[all]) (1.26.14)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from requests>=2.27.0->refuel-autolabel[all]) (2021.10.8)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from requests>=2.27.0->refuel-autolabel[all]) (2.10)\n",
      "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from rich>=13.3.5->refuel-autolabel[all]) (2.2.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from rich>=13.3.5->refuel-autolabel[all]) (2.15.1)\n",
      "Requirement already satisfied: joblib>=0.11 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from scikit-learn>=1.0.0->refuel-autolabel[all]) (1.2.0)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from scikit-learn>=1.0.0->refuel-autolabel[all]) (3.0.0)\n",
      "Requirement already satisfied: filelock in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from transformers>=4.25.0->refuel-autolabel[all]) (3.4.2)\n",
      "Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from typer[all]>=0.9.0->refuel-autolabel[all]) (8.1.3)\n",
      "Requirement already satisfied: colorama<0.5.0,>=0.4.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from typer[all]>=0.9.0->refuel-autolabel[all]) (0.4.5)\n",
      "Requirement already satisfied: shellingham<2.0.0,>=1.3.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from typer[all]>=0.9.0->refuel-autolabel[all]) (1.5.0.post1)\n",
      "Requirement already satisfied: platformdirs>=2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from black->refuel-autolabel[all]) (2.5.1)\n",
      "Requirement already satisfied: pathspec>=0.9.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from black->refuel-autolabel[all]) (0.9.0)\n",
      "Requirement already satisfied: mypy-extensions>=0.4.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from black->refuel-autolabel[all]) (0.4.3)\n",
      "Requirement already satisfied: tomli>=1.1.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from black->refuel-autolabel[all]) (2.0.1)\n",
      "Requirement already satisfied: beautifulsoup4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from bs4->refuel-autolabel[all]) (4.10.0)\n",
      "Requirement already satisfied: pathlib2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from bumpver->refuel-autolabel[all]) (2.3.7.post1)\n",
      "Requirement already satisfied: toml in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from bumpver->refuel-autolabel[all]) (0.10.2)\n",
      "Requirement already satisfied: lexid in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from bumpver->refuel-autolabel[all]) (2021.1006)\n",
      "Requirement already satisfied: looseversion in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from bumpver->refuel-autolabel[all]) (1.1.2)\n",
      "Requirement already satisfied: sniffio in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from httpx->refuel-autolabel[all]) (1.2.0)\n",
      "Requirement already satisfied: rfc3986[idna2008]<2,>=1.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from httpx->refuel-autolabel[all]) (1.5.0)\n",
      "Requirement already satisfied: httpcore<0.15.0,>=0.14.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from httpx->refuel-autolabel[all]) (0.14.5)\n",
      "Requirement already satisfied: build in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pip-tools->refuel-autolabel[all]) (0.10.0)\n",
      "Requirement already satisfied: pip>=22.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pip-tools->refuel-autolabel[all]) (23.1.2)\n",
      "Requirement already satisfied: setuptools in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pip-tools->refuel-autolabel[all]) (65.6.3)\n",
      "Requirement already satisfied: wheel in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pip-tools->refuel-autolabel[all]) (0.40.0)\n",
      "Requirement already satisfied: cfgv>=2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pre-commit->refuel-autolabel[all]) (3.3.1)\n",
      "Requirement already satisfied: identify>=1.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pre-commit->refuel-autolabel[all]) (2.5.9)\n",
      "Requirement already satisfied: nodeenv>=0.11.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pre-commit->refuel-autolabel[all]) (1.7.0)\n",
      "Requirement already satisfied: virtualenv>=20.0.8 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pre-commit->refuel-autolabel[all]) (20.16.7)\n",
      "Requirement already satisfied: iniconfig in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pytest->refuel-autolabel[all]) (1.1.1)\n",
      "Requirement already satisfied: pluggy<2.0,>=0.12 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pytest->refuel-autolabel[all]) (1.0.0)\n",
      "Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pytest->refuel-autolabel[all]) (1.0.4)\n",
      "Requirement already satisfied: torchvision in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from sentence-transformers->refuel-autolabel[all]) (0.11.2)\n",
      "Requirement already satisfied: nltk in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from sentence-transformers->refuel-autolabel[all]) (3.6.6)\n",
      "Requirement already satisfied: sentencepiece in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from sentence-transformers->refuel-autolabel[all]) (0.1.96)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from aiohttp->anthropic==0.2.6->refuel-autolabel[all]) (5.2.0)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from aiohttp->anthropic==0.2.6->refuel-autolabel[all]) (1.7.2)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from aiohttp->anthropic==0.2.6->refuel-autolabel[all]) (1.2.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from aiohttp->anthropic==0.2.6->refuel-autolabel[all]) (1.2.0)\n",
      "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.210->refuel-autolabel[all]) (3.17.1)\n",
      "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.210->refuel-autolabel[all]) (1.5.1)\n",
      "Requirement already satisfied: typing-inspect>=0.4.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain==0.0.210->refuel-autolabel[all]) (0.8.0)\n",
      "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.56.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.56.4)\n",
      "Requirement already satisfied: google-auth<3.0dev,>=1.25.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (2.6.0)\n",
      "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.54.0)\n",
      "Requirement already satisfied: grpcio-status<2.0dev,>=1.33.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.54.0)\n",
      "Requirement already satisfied: google-cloud-core<3.0.0dev,>=1.6.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (2.3.2)\n",
      "Requirement already satisfied: google-resumable-media<3.0dev,>=0.6.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (2.4.1)\n",
      "Requirement already satisfied: grpc-google-iam-v1<1.0.0dev,>=0.12.4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-cloud-resource-manager<3.0.0dev,>=1.3.3->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (0.12.6)\n",
      "Requirement already satisfied: h11<0.13,>=0.11 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from httpcore<0.15.0,>=0.14.0->httpx->refuel-autolabel[all]) (0.12.0)\n",
      "Requirement already satisfied: anyio==3.* in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from httpcore<0.15.0,>=0.14.0->httpx->refuel-autolabel[all]) (3.4.0)\n",
      "Requirement already satisfied: zipp>=0.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from importlib_metadata<7.0,>=6.0->cohere>=4.11.2->refuel-autolabel[all]) (3.6.0)\n",
      "Requirement already satisfied: debugpy<2.0,>=1.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (1.5.1)\n",
      "Requirement already satisfied: jupyter-client<8.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (7.1.0)\n",
      "Requirement already satisfied: tornado<7.0,>=4.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (6.1)\n",
      "Requirement already satisfied: matplotlib-inline<0.2.0,>=0.1.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (0.1.3)\n",
      "Requirement already satisfied: nest-asyncio in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (1.5.4)\n",
      "Requirement already satisfied: appnope in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (0.1.2)\n",
      "Requirement already satisfied: jedi>=0.16 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.18.1)\n",
      "Requirement already satisfied: decorator in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (5.1.1)\n",
      "Requirement already satisfied: pickleshare in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.7.5)\n",
      "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (3.0.39)\n",
      "Requirement already satisfied: backcall in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.2.0)\n",
      "Requirement already satisfied: pexpect>4.3 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (4.8.0)\n",
      "Requirement already satisfied: mdurl~=0.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich>=13.3.5->refuel-autolabel[all]) (0.1.2)\n",
      "Requirement already satisfied: six>=1.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib>=3.5.0->refuel-autolabel[all]) (1.16.0)\n",
      "Requirement already satisfied: distlib<1,>=0.3.6 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from virtualenv>=20.0.8->pre-commit->refuel-autolabel[all]) (0.3.6)\n",
      "Requirement already satisfied: soupsieve>1.2 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from beautifulsoup4->bs4->refuel-autolabel[all]) (2.3.1)\n",
      "Requirement already satisfied: pyproject_hooks in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from build->pip-tools->refuel-autolabel[all]) (1.0.0)\n",
      "Requirement already satisfied: cffi>=1.12 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber>=0.10.2->refuel-autolabel[all]) (1.15.0)\n",
      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (4.2.4)\n",
      "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (0.2.8)\n",
      "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (4.8)\n",
      "Requirement already satisfied: google-crc32c<2.0dev,>=1.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery<4.0.0dev,>=1.15.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (1.5.0)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.8.3)\n",
      "Requirement already satisfied: entrypoints in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (0.3)\n",
      "Requirement already satisfied: jupyter-core>=4.6.0 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (4.9.1)\n",
      "Requirement already satisfied: pyzmq>=13 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=4.5.1->ipywidgets==8.0.6->refuel-autolabel[all]) (22.3.0)\n",
      "Requirement already satisfied: ptyprocess>=0.5 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.7.0)\n",
      "Requirement already satisfied: wcwidth in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=6.1.0->ipywidgets==8.0.6->refuel-autolabel[all]) (0.2.5)\n",
      "Requirement already satisfied: pycparser in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from cffi>=1.12->cryptography>=36.0.0->pdfminer.six==20221105->pdfplumber>=0.10.2->refuel-autolabel[all]) (2.21)\n",
      "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /Users/nihit/Documents/refuel-ai.nosync/virtualenv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3.0dev,>=1.25.0->google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.32.0->google-cloud-aiplatform>=1.25.0->refuel-autolabel[all]) (0.4.8)\n",
      "Installing collected packages: pypdfium2, pytest-asyncio, pdfplumber\n",
      "  Attempting uninstall: pdfplumber\n",
      "    Found existing installation: pdfplumber 0.8.0\n",
      "    Uninstalling pdfplumber-0.8.0:\n",
      "      Successfully uninstalled pdfplumber-0.8.0\n",
      "Successfully installed pdfplumber-0.10.2 pypdfium2-4.18.0 pytest-asyncio-0.21.1\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install refuel-autolabel[all]\n",
    "!pip install beautifulsoup4 httpx fake_useragent"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "aea97f23",
   "metadata": {},
   "source": [
    "## Finding the State of National Park using Autolabel\n",
    "\n",
    "We will use Autolabel to find the state of the national park given a url to the national park nps website. First, we will use a transform to extract the content of the website. Then, using the content, we will structure this as a question_answering task to extract the state of the park from this webpage."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "2eab19b6",
   "metadata": {},
   "source": [
    "Notice the \"transforms\" part of the config. Here we use the url column to extract the text on the webpage. This content of the webpage is sent to the column called \"content\" in the \"output_columns\" part of the transform. Next, in the \"example_template\" we use this \"content\" column in order to send the website text and ask the question about the state of the national park."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "50e7446e",
   "metadata": {},
   "outputs": [],
   "source": [
    "config = {\n",
    "    \"task_name\": \"NationalPark\",\n",
    "    \"task_type\": \"question_answering\",\n",
    "    \"dataset\": {\n",
    "    },\n",
    "    \"model\": {\n",
    "        \"provider\": \"openai\",\n",
    "        \"name\": \"gpt-3.5-turbo\",\n",
    "    },\n",
    "    \"transforms\": [{\n",
    "        \"name\": \"webpage_transform\",\n",
    "        \"params\": {\n",
    "            \"url_column\": \"url\",\n",
    "        },\n",
    "        \"output_columns\": {\n",
    "            \"content_column\": \"content\",\n",
    "        },\n",
    "    }],\n",
    "    \"prompt\": {\n",
    "        \"task_guidelines\": \"You are an expert at understanding websites of national parks. You will be given a webpage about a national park. Answer with the US State that the national park is located in.\",\n",
    "        \"output_guidelines\": \"Answer in one word the state that the national park is located in.\",\n",
    "        \"example_template\": r\"Content of wikipedia page: {content}\\State:\",\n",
    "    },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "201b498e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# provide your own OpenAI API key here\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-XXXXXXXXXXXXXXXXXXXXXXXX\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fc6be6ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "from autolabel import AutolabelDataset, LabelingAgent\n",
    "\n",
    "agent = LabelingAgent(config)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "060a41a1",
   "metadata": {},
   "source": [
    "A small manually collected dataset of national parks and their websites containing information about them. We intend to use the LLM to find out the state which may be buried at different parts in the website."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5b79df29",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame([\n",
    "    {\n",
    "        \"url\": \"https://www.visitmt.com/places-to-go/glacier-national-park\",\n",
    "        \"name\": \"Glacier National Park\",\n",
    "    },\n",
    "    {\n",
    "        \"url\": \"https://www.nps.gov/dena/index.htm\",\n",
    "        \"name\": \"Denali National Park\",\n",
    "    },\n",
    "    {\n",
    "        \"url\": \"https://www.nps.gov/lavo/index.htm\",\n",
    "        \"name\": \"Lassen Volcanic National Park\",\n",
    "    },\n",
    "    {\n",
    "        \"url\": \"https://www.nps.gov/olym/index.htm\",\n",
    "        \"name\": \"Olympic National Park\",\n",
    "    },\n",
    "    {\n",
    "        \"url\": \"https://www.nps.gov/pinn/index.htm\",\n",
    "        \"name\": \"Pinnacles National Park\",\n",
    "    },\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "1d3476bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = AutolabelDataset(df, config)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "15695b36",
   "metadata": {},
   "source": [
    "## Running the transform\n",
    "First, we run transform in order to run the Webpage transformation and populate the content column of the dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "55bda1eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "036ebf6ac47444b0913f13e830b6e607",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds = agent.transform(ds)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "1dce5e1d",
   "metadata": {},
   "source": [
    "## Running the labeling function\n",
    "Now, we use the send the content of the website along with the question in order to return the state of the national park."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f513a335",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b7767b01e3c4f31b615e9ad9a0729e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ds = agent.run(ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a9eafc65",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>url</th>\n",
       "      <th>name</th>\n",
       "      <th>content</th>\n",
       "      <th>content_in_bytes_column</th>\n",
       "      <th>soup_column</th>\n",
       "      <th>metadata_column</th>\n",
       "      <th>NationalPark_label</th>\n",
       "      <th>NationalPark_error</th>\n",
       "      <th>NationalPark_successfully_labeled</th>\n",
       "      <th>NationalPark_annotation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://www.visitmt.com/places-to-go/glacier-n...</td>\n",
       "      <td>Glacier National Park</td>\n",
       "      <td>\\n\\n\\n\\n\\n\\n\\nGlacier National Park\\n\\n\\n\\n\\n\\...</td>\n",
       "      <td>b'\\n&lt;!doctype html&gt;\\n  &lt;html lang=\"en\"&gt;\\n&lt;head...</td>\n",
       "      <td>[\\n, html, \\n, [\\n, [\\n,  Google Tag Manager ,...</td>\n",
       "      <td>{'url': 'https://www.visitmt.com/places-to-go/...</td>\n",
       "      <td>Montana</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "      <td>b'\\x80\\x04\\x95q\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://www.nps.gov/dena/index.htm</td>\n",
       "      <td>Denali National Park</td>\n",
       "      <td>\\n   Denali National Park &amp; Preserve (U.S. N...</td>\n",
       "      <td>b'&lt;!doctype html&gt; &lt;html lang=\"en\" class=\"no-js...</td>\n",
       "      <td>[html, \\n, [ ,  Content Copyright National Par...</td>\n",
       "      <td>{'url': 'https://www.nps.gov/dena/index.htm', ...</td>\n",
       "      <td>Alaska</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "      <td>b'\\x80\\x04\\x95p\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://www.nps.gov/lavo/index.htm</td>\n",
       "      <td>Lassen Volcanic National Park</td>\n",
       "      <td>\\n   Lassen Volcanic National Park (U.S. Nat...</td>\n",
       "      <td>b'&lt;!doctype html&gt; &lt;html lang=\"en\" class=\"no-js...</td>\n",
       "      <td>[html, \\n, [ ,  Content Copyright National Par...</td>\n",
       "      <td>{'url': 'https://www.nps.gov/lavo/index.htm', ...</td>\n",
       "      <td>California</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "      <td>b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>https://www.nps.gov/olym/index.htm</td>\n",
       "      <td>Olympic National Park</td>\n",
       "      <td>\\n   Olympic National Park (U.S. National Pa...</td>\n",
       "      <td>b'&lt;!doctype html&gt; &lt;html lang=\"en\" class=\"no-js...</td>\n",
       "      <td>[html, \\n, [ ,  Content Copyright National Par...</td>\n",
       "      <td>{'url': 'https://www.nps.gov/olym/index.htm', ...</td>\n",
       "      <td>Washington</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "      <td>b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>https://www.nps.gov/pinn/index.htm</td>\n",
       "      <td>Pinnacles National Park</td>\n",
       "      <td>\\n   Pinnacles National Park (U.S. National ...</td>\n",
       "      <td>b'&lt;!doctype html&gt; &lt;html lang=\"en\" class=\"no-js...</td>\n",
       "      <td>[html, \\n, [ ,  Content Copyright National Par...</td>\n",
       "      <td>{'url': 'https://www.nps.gov/pinn/index.htm', ...</td>\n",
       "      <td>California</td>\n",
       "      <td>None</td>\n",
       "      <td>True</td>\n",
       "      <td>b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 url  \\\n",
       "0  https://www.visitmt.com/places-to-go/glacier-n...   \n",
       "1                 https://www.nps.gov/dena/index.htm   \n",
       "2                 https://www.nps.gov/lavo/index.htm   \n",
       "3                 https://www.nps.gov/olym/index.htm   \n",
       "4                 https://www.nps.gov/pinn/index.htm   \n",
       "\n",
       "                            name  \\\n",
       "0          Glacier National Park   \n",
       "1           Denali National Park   \n",
       "2  Lassen Volcanic National Park   \n",
       "3          Olympic National Park   \n",
       "4        Pinnacles National Park   \n",
       "\n",
       "                                             content  \\\n",
       "0  \\n\\n\\n\\n\\n\\n\\nGlacier National Park\\n\\n\\n\\n\\n\\...   \n",
       "1    \\n   Denali National Park & Preserve (U.S. N...   \n",
       "2    \\n   Lassen Volcanic National Park (U.S. Nat...   \n",
       "3    \\n   Olympic National Park (U.S. National Pa...   \n",
       "4    \\n   Pinnacles National Park (U.S. National ...   \n",
       "\n",
       "                             content_in_bytes_column  \\\n",
       "0  b'\\n<!doctype html>\\n  <html lang=\"en\">\\n<head...   \n",
       "1  b'<!doctype html> <html lang=\"en\" class=\"no-js...   \n",
       "2  b'<!doctype html> <html lang=\"en\" class=\"no-js...   \n",
       "3  b'<!doctype html> <html lang=\"en\" class=\"no-js...   \n",
       "4  b'<!doctype html> <html lang=\"en\" class=\"no-js...   \n",
       "\n",
       "                                         soup_column  \\\n",
       "0  [\\n, html, \\n, [\\n, [\\n,  Google Tag Manager ,...   \n",
       "1  [html, \\n, [ ,  Content Copyright National Par...   \n",
       "2  [html, \\n, [ ,  Content Copyright National Par...   \n",
       "3  [html, \\n, [ ,  Content Copyright National Par...   \n",
       "4  [html, \\n, [ ,  Content Copyright National Par...   \n",
       "\n",
       "                                     metadata_column NationalPark_label  \\\n",
       "0  {'url': 'https://www.visitmt.com/places-to-go/...            Montana   \n",
       "1  {'url': 'https://www.nps.gov/dena/index.htm', ...             Alaska   \n",
       "2  {'url': 'https://www.nps.gov/lavo/index.htm', ...         California   \n",
       "3  {'url': 'https://www.nps.gov/olym/index.htm', ...         Washington   \n",
       "4  {'url': 'https://www.nps.gov/pinn/index.htm', ...         California   \n",
       "\n",
       "  NationalPark_error  NationalPark_successfully_labeled  \\\n",
       "0               None                               True   \n",
       "1               None                               True   \n",
       "2               None                               True   \n",
       "3               None                               True   \n",
       "4               None                               True   \n",
       "\n",
       "                             NationalPark_annotation  \n",
       "0  b'\\x80\\x04\\x95q\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...  \n",
       "1  b'\\x80\\x04\\x95p\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...  \n",
       "2  b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...  \n",
       "3  b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...  \n",
       "4  b'\\x80\\x04\\x95t\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x8...  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ds.df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
